import numpy as np
import matplotlib.pyplot as plt

# Generate some random data for demonstration
np.random.seed(0)
X = 2 * np.linspace(0,2,50).reshape(50,1)
e = 0 #np.random.randn(100, 1)
y = 4 + 3 * X + e

# Plot the data and the fitted regression line
plt.scatter(X, y, color="blue", label="Data Points")
#plt.plot(X_new, y_predict, color="red", label="Best Fit Line")
plt.xlabel("X")
plt.ylabel("y")
plt.legend()
plt.show()

import numpy as np
import matplotlib.pyplot as plt

# Generate some random data for demonstration
np.random.seed(0)
X = 2 * np.linspace(0,2,50).reshape(50,1)
e = 0 #np.random.randn(100, 1)
y = 4 + 3 * X + e

# Add a bias (ones) column to X
X_b = np.c_[np.ones((50, 1)), X]

# Compute theta using the normal equation
theta_best = np.linalg.inv(X_b.T.dot(X_b)).dot(X_b.T).dot(y)

# Extract the parameters (intercept and slope)
intercept, slope = theta_best[0][0], theta_best[1][0]

print(f"Intercept: {intercept}, Slope: {slope}")

# Predictions based on the fitted model
X_new = np.array([[0], [4]])  # Predict for X values 0 and 2
X_new_b = np.c_[np.ones((2, 1)), X_new]  # Add bias term
y_predict = X_new_b.dot(theta_best)

# Plot the data and the fitted regression line
plt.scatter(X, y, color="blue", label="Data Points")
plt.plot(X_new, y_predict, color="red", label="Best Fit Line")
plt.xlabel("X")
plt.ylabel("y")
plt.legend()
plt.show()

Intercept: 3.9999999999999964, Slope: 3.0000000000000027

X = 2 * np.linspace(0,2,50).reshape(50,1)
e = np.random.randn(50, 1)*20
y = 4 + 3 * X + e

# Add a bias (ones) column to X
X_b = np.c_[np.ones((50, 1)), X]

# Compute theta using the normal equation
theta_best = np.linalg.inv(X_b.T.dot(X_b)).dot(X_b.T).dot(y)

# Extract the parameters (intercept and slope)
intercept, slope = theta_best[0][0], theta_best[1][0]

print(f"Intercept: {intercept}, Slope: {slope}")

# Predictions based on the fitted model
X_new = np.array([[0], [4]])  # Predict for X values 0 and 2
X_new_b = np.c_[np.ones((2, 1)), X_new]  # Add bias term
y_predict = X_new_b.dot(theta_best)

# Plot the data and the fitted regression line
plt.scatter(X, y, color="blue", label="Data Points")
plt.plot(X_new, y_predict, color="red", label="Best Fit Line")
plt.xlabel("X")
plt.ylabel("y")
plt.legend()
plt.show()

Intercept: 6.728691848395206, Slope: 2.3908504252502185

theta_bests = []
for _ in range(100):
    X = 2 * np.linspace(0,2,50).reshape(50,1)
    e = np.random.randn(50, 1)*20
    y = 4 + 3 * X + e

    # Add a bias (ones) column to X
    X_b = np.c_[np.ones((50, 1)), X]

    # Compute theta using the normal equation
    theta_best = np.linalg.inv(X_b.T.dot(X_b)).dot(X_b.T).dot(y)

    # Extract the parameters (intercept and slope)
    intercept, slope = theta_best[0][0], theta_best[1][0]

    print(f"Intercept: {intercept}, Slope: {slope}")

    # Predictions based on the fitted model
    X_new = np.array([[0], [4]])  # Predict for X values 0 and 2
    X_new_b = np.c_[np.ones((2, 1)), X_new]  # Add bias term
    y_predict = X_new_b.dot(theta_best)
    theta_bests.append(theta_best)
    plt.plot(X_new, y_predict, color="red", label="Best Fit Line", alpha=0.2)

# Plot the data and the fitted regression line
plt.scatter(X, y, color="blue", label="Data Points")
plt.xlabel("X")
plt.ylabel("y")
#plt.legend()
plt.show()

Intercept: 11.882434798096833, Slope: -0.3734247609031549
Intercept: 15.49690728330473, Slope: -1.1998199107316765
Intercept: 3.3409504528228138, Slope: 2.8509662570655303
Intercept: 5.572846113681638, Slope: 1.0849744506090178
Intercept: 11.755563928074572, Slope: 0.95411201962541
Intercept: 10.406082566124928, Slope: 1.303474310322843
Intercept: 6.717603790816573, Slope: 0.996589346402581
Intercept: 9.292527773297143, Slope: 3.6579029080113763
Intercept: 4.364708950195069, Slope: 4.3248652588538885
Intercept: 12.635046957089369, Slope: -0.21901986861502332
Intercept: 8.183537333649156, Slope: 1.8358733072927522
Intercept: 8.459668337135525, Slope: 1.523785976373799
Intercept: 2.4070050598106625, Slope: 5.342993524652654
Intercept: 7.61734824866375, Slope: 2.2030058927022154
Intercept: 5.36434086399084, Slope: 3.5455701498281975
Intercept: 3.182805503339549, Slope: 4.834020331083406
Intercept: -2.9452034982034276, Slope: 7.646743527113187
Intercept: 16.98065711425232, Slope: -2.629785517383855
Intercept: 3.0114763078143962, Slope: 3.820897504618905
Intercept: 9.385952533667302, Slope: 3.3675678744628175
Intercept: 7.088395222177419, Slope: 2.0572259762144673
Intercept: -1.9440530140733772, Slope: 4.192757279501732
Intercept: 7.033771983040196, Slope: -0.8057263351231139
Intercept: 5.924252027248959, Slope: 1.8947914362778402
Intercept: 11.775146676114774, Slope: -0.3794720728281338
Intercept: 13.641337474011245, Slope: -0.3631120338690016
Intercept: 9.230105308860471, Slope: 1.661893593456894
Intercept: -1.1811250662980295, Slope: 6.364447078670095
Intercept: -2.759124000443534, Slope: 5.317333025157281
Intercept: 7.663453256547402, Slope: 1.8248158711637372
Intercept: 1.1827228284611526, Slope: 2.9625294351762834
Intercept: 0.8077977158508834, Slope: 1.3232085201045456
Intercept: -0.9056558110712929, Slope: 3.352507551630165
Intercept: 9.18900018050904, Slope: -0.991924488442337
Intercept: -0.785320771054002, Slope: 4.024527686541454
Intercept: 3.0340755473754215, Slope: 4.194812390094594
Intercept: -0.7149021390205066, Slope: 4.6624709891226015
Intercept: 17.627283555484937, Slope: -0.7225077478794062
Intercept: 7.402853885514338, Slope: 2.8720638381614245
Intercept: -3.554942944754898, Slope: 5.090845207923869
Intercept: 2.1541308767501435, Slope: 4.440489855854662
Intercept: -0.8340776414597488, Slope: 3.878543686238951
Intercept: 4.037177214397444, Slope: 2.423471151426762
Intercept: 5.913609537782247, Slope: 2.3128533789158605
Intercept: -2.0367889823396306, Slope: 6.862705348427488
Intercept: 2.60549819141644, Slope: 2.3422017651961347
Intercept: 4.820046927701158, Slope: 4.09091642018651
Intercept: 10.465381057782723, Slope: -1.296859750802899
Intercept: -1.558928639523887, Slope: 4.804622651005314
Intercept: 6.702578733824666, Slope: 1.5708530567699477
Intercept: -0.0039956088724723005, Slope: 5.409283189924561
Intercept: 4.658006837640671, Slope: 2.0652310986208295
Intercept: 10.596340032816963, Slope: -0.9608673558153218
Intercept: 4.938326343087867, Slope: 1.3094405417426278
Intercept: 3.9059843540591483, Slope: 3.9503533946910983
Intercept: 3.7460111568085055, Slope: 3.88243484326055
Intercept: 2.5375939158798744, Slope: 5.525781962868229
Intercept: -5.672236494108774, Slope: 4.223207016603675
Intercept: -5.934699228721555, Slope: 5.823174194882605
Intercept: 0.2595438960173071, Slope: 4.177579600189423
Intercept: 12.479983134601794, Slope: 0.7200916522270915
Intercept: -1.7899297320289795, Slope: 4.60839317575464
Intercept: 4.90043977686158, Slope: 1.9321270319718915
Intercept: 8.318616542766545, Slope: 0.99569873677272
Intercept: 18.398792211846164, Slope: -1.5017985582760498
Intercept: -1.158915041366336, Slope: 4.458440493499981
Intercept: -4.178804192660283, Slope: 5.789606698752617
Intercept: 4.459491028110083, Slope: 4.908593240202992
Intercept: 8.306040395557154, Slope: 2.842611184289988
Intercept: 1.9394154218362638, Slope: 2.8840095983171268
Intercept: -0.8134905142688755, Slope: 5.057371425426823
Intercept: 5.517420952851732, Slope: 4.651840707515755
Intercept: 10.743168089819969, Slope: 1.2566677642761135
Intercept: 10.21366777062998, Slope: 1.6248432390755507
Intercept: -6.054979011283696, Slope: 7.058613946934135
Intercept: 5.065796383814089, Slope: -0.2930578917603244
Intercept: 18.63092287933287, Slope: -1.1579945540447414
Intercept: 5.278943025790107, Slope: 2.4965872880048794
Intercept: 11.264728351833519, Slope: 1.6623697711796388
Intercept: 4.410373030916574, Slope: 2.4381835587701
Intercept: 1.7612013271198768, Slope: 3.728766907177704
Intercept: -2.478121742875077, Slope: 6.854912231818205
Intercept: 5.900073361484468, Slope: 1.1938075435319835
Intercept: 9.644551276638847, Slope: -0.028547975282565252
Intercept: 4.658088127612539, Slope: 3.078797675974395
Intercept: 0.5716565148204034, Slope: 3.7009546102518014
Intercept: 3.100669606449579, Slope: 5.283027082434479
Intercept: 3.777744511713624, Slope: 1.9801698135403432
Intercept: 10.833687382398072, Slope: 2.8893137599985805
Intercept: 4.673678544263564, Slope: 0.8686065905416371
Intercept: 9.446753818894752, Slope: 0.15532997187430891
Intercept: -3.4132875142918158, Slope: 6.90214023312388
Intercept: 1.7165472096366297, Slope: 3.7338051729630237
Intercept: 8.17157627448752, Slope: 2.7389427656202363
Intercept: 18.443210925236116, Slope: -2.9259975905900824
Intercept: 1.6000303678487606, Slope: 2.605846710328043
Intercept: -1.374458675811904, Slope: 5.6858071971686615
Intercept: 8.95574427544881, Slope: 2.7921757198887995
Intercept: -1.7224843077428171, Slope: 5.129665216511528
Intercept: 4.498332216475924, Slope: 3.9128731910792895

import seaborn as sns
theta_0 = [theta[0][0] for theta in theta_bests]
sns.histplot(data=theta_0, kde=True)
plt.title(f'Theta_0 Mean: {np.mean(theta_0):.2f} Std: {np.std(theta_0):.2f}')

Text(0.5, 1.0, 'Theta_0 Mean: 4.99 Std: 5.66')

import numpy as np 
theta_1 = [theta[1][0] for theta in theta_bests]
sns.histplot(data=theta_1, kde=True)
plt.title(f'Theta_0 Mean: {np.mean(theta_1):.2f} Std: {np.std(theta_1):.2f}')

Text(0.5, 1.0, 'Theta_0 Mean: 2.73 Std: 2.29')

X_new = np.array([[0]])  # Predict for X values 0 and 2
X_new_b = np.c_[np.ones((1, 1)), X_new]  # Add bias term
y_new = []
for theta_best in theta_bests:

    y_predict = X_new_b.dot(theta_best)
    y_new.append(y_predict)

X_new = np.array([[5]])  # Predict for X values 0 and 2
X_new_b = np.c_[np.ones((1, 1)), X_new]  # Add bias term
y_new = []
for theta_best in theta_bests:

    y_predict = X_new_b.dot(theta_best)
    y_new.append(y_predict.item())

y_new[:10]

[19.513844232619803,
 18.962031902673278,
 18.09117117037552,
 19.084034670845064,
 19.090722553864552,
 19.409551807117236,
 18.724285754248733,
 19.326003613287075,
 18.84933096981272,
 19.1031452638218]

sns.histplot(data=y_new, kde=True)
plt.title(f'Theta_0 Mean: {np.mean(y_new):.2f} Std: {np.std(y_new):.2f}')

Text(0.5, 1.0, 'Theta_0 Mean: 18.99 Std: 0.36')

sns.ecdfplot(data=y_new)
plt.title(f'Prediction Uncertainty: P10: {np.percentile(y_new,10):.2f} P50: {np.percentile(y_new,50):.2f} P90: {np.percentile(y_new,90):.2f}')

Text(0.5, 1.0, 'Prediction Uncertainty: P10: 18.53 P50: 19.04 P90: 19.44')

X_new = np.array([[5]])  # Predict for X values 0 and 2
X_new_b = np.c_[np.ones((1, 1)), X_new]  # Add bias term
y_new = []
for theta_best in theta_bests:

    y_predict = X_new_b.dot(theta_best)   +np.random.randn(1, 1)*1
    y_new.append(y_predict.item())

sns.histplot(data=y_new, kde=True)
plt.title(f'Theta_0 Mean: {np.mean(y_new):.2f} Std: {np.std(y_new):.2f}')

Text(0.5, 1.0, 'Theta_0 Mean: 19.03 Std: 1.04')

Linear Regression Uncertainty¶

Normal Equation¶

Steps:¶

Absence of Random Noises (Residuals)¶

Presence of Random Noises (Residuals)¶

Simulation¶

Coefficient Distributions & Uncertainty¶

Thought: What happen to the coefficient distritbutions if we reduce the random noises ?¶

Predicting at $x=5$¶

How to present this prediction distribution ?¶

The Overall Uncertainty¶